{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import sys\n",
    "import os\n",
    "from os.path import dirname, realpath, join\n",
    "base_dir = dirname(dirname(os.getcwd()))\n",
    "base_dir\n",
    "import pandas as pd\n",
    "from os.path import join\n",
    "sys.path.insert(0, base_dir)\n",
    "from config_path import PROSTATE_DATA_PATH, PLOTS_PATH"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "filename=join(PROSTATE_DATA_PATH, 'processed/P1000_final_analysis_set_cross_important_only.csv')\n",
    "mut_df = pd.read_csv(filename, index_col=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A1BG</th>\n",
       "      <th>A1CF</th>\n",
       "      <th>A2M</th>\n",
       "      <th>A2ML1</th>\n",
       "      <th>A4GALT</th>\n",
       "      <th>A4GNT</th>\n",
       "      <th>AAAS</th>\n",
       "      <th>AACS</th>\n",
       "      <th>AADAC</th>\n",
       "      <th>AADACL3</th>\n",
       "      <th>...</th>\n",
       "      <th>ZW10</th>\n",
       "      <th>ZWILCH</th>\n",
       "      <th>ZWINT</th>\n",
       "      <th>ZXDA</th>\n",
       "      <th>ZXDB</th>\n",
       "      <th>ZXDC</th>\n",
       "      <th>ZYG11B</th>\n",
       "      <th>ZYX</th>\n",
       "      <th>ZZEF1</th>\n",
       "      <th>ZZZ3</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Tumor_Sample_Barcode</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>00-029N9_LN</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>01-087MM_BONE</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>01-095N1_LN</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>01-120A1_LIVER</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>02-083E1_LN</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 14378 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                      A1BG  A1CF  A2M  A2ML1  A4GALT  A4GNT  AAAS  AACS  \\\n",
       "Tumor_Sample_Barcode                                                      \n",
       "00-029N9_LN            0.0   0.0  0.0    0.0     0.0    0.0   0.0   0.0   \n",
       "01-087MM_BONE          0.0   0.0  0.0    0.0     0.0    0.0   0.0   0.0   \n",
       "01-095N1_LN            0.0   0.0  0.0    0.0     0.0    0.0   0.0   0.0   \n",
       "01-120A1_LIVER         0.0   0.0  0.0    0.0     0.0    0.0   0.0   0.0   \n",
       "02-083E1_LN            0.0   0.0  0.0    0.0     0.0    0.0   0.0   0.0   \n",
       "\n",
       "                      AADAC  AADACL3  ...   ZW10  ZWILCH  ZWINT  ZXDA  ZXDB  \\\n",
       "Tumor_Sample_Barcode                  ...                                     \n",
       "00-029N9_LN             0.0      0.0  ...    0.0     0.0    0.0   0.0   0.0   \n",
       "01-087MM_BONE           0.0      0.0  ...    0.0     0.0    0.0   0.0   0.0   \n",
       "01-095N1_LN             0.0      0.0  ...    0.0     0.0    0.0   0.0   0.0   \n",
       "01-120A1_LIVER          0.0      0.0  ...    0.0     0.0    0.0   0.0   0.0   \n",
       "02-083E1_LN             0.0      0.0  ...    0.0     0.0    0.0   0.0   0.0   \n",
       "\n",
       "                      ZXDC  ZYG11B  ZYX  ZZEF1  ZZZ3  \n",
       "Tumor_Sample_Barcode                                  \n",
       "00-029N9_LN            0.0     0.0  0.0    0.0   0.0  \n",
       "01-087MM_BONE          0.0     0.0  0.0    0.0   0.0  \n",
       "01-095N1_LN            0.0     0.0  0.0    0.0   0.0  \n",
       "01-120A1_LIVER         0.0     0.0  0.0    0.0   0.0  \n",
       "02-083E1_LN            0.0     0.0  0.0    0.0   0.0  \n",
       "\n",
       "[5 rows x 14378 columns]"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "mut_df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "60935.0"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "mut_df.sum().sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "21.0"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import numpy as np\n",
    "np.max(mut_df.values.ravel())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_long= pd.melt(mut_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>variable</th>\n",
       "      <th>value</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>A1BG</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>A1BG</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>A1BG</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>A1BG</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>A1BG</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  variable  value\n",
       "0     A1BG    0.0\n",
       "1     A1BG    0.0\n",
       "2     A1BG    0.0\n",
       "3     A1BG    0.0\n",
       "4     A1BG    0.0"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "c.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX8AAAEUCAYAAADDdzb+AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAHMhJREFUeJzt3X+YHVWd5/H3hwQQggQiDYMEDDsEAXWJkA1xQUVgQkAHcIEH8AcBcTMqDMyOsxJGH3FEZsBxh5FVwCiRgEIEFAgIEyIQcYRAAoT8IGBaQBKDECcQUBzdwHf/OKdJ0dzO7W7qdrrv+bye5z637qlT9T11b/W3qqtOVSkiMDOzsmy2qRtgZmYDz8nfzKxATv5mZgVy8jczK5CTv5lZgZz8zcwK5ORvZlYgJ38zswI5+ZuZFWj4pm5Af+2www4xZsyYTd0MM7NB44EHHvhtRHT0pu6QTf5jxoxh4cKFm7oZZmaDhqRf9bauD/uYmRXIyd/MrEBO/mZmBXLyNzMrkJO/mVmBnPzNzArk5G9mViAnfzOzAg3Zi7x6Mmbaj/s13ZMXfLDmlpiZDV7e8zczK5CTv5lZgZz8zcwK5ORvZlYgJ38zswI5+ZuZFcjJ38ysQE7+ZmYFcvI3MyuQk7+ZWYGc/M3MCuTkb2ZWICd/M7MCOfmbmRXIyd/MrEBO/mZmBepV8pf0pKQlkhZJWpjLRkmaK2lFft8+l0vSxZI6JS2WtF9lPlNy/RWSplTK98/z78zTqu4FNTOzDfqy5/+BiBgXEePz52nAHRExFrgjfwY4AhibX1OBSyFtLIBzgQOACcC5XRuMXGdqZbrJ/V4iMzNr6o0c9jkamJmHZwLHVMqvjGQ+sJ2knYHDgbkRsTYingPmApPzuG0j4t6ICODKyrzMzKwFepv8A7hd0gOSpuaynSLiaYD8vmMu3wVYWZl2VS7bWPmqBuVmZtYivX2A+4ERsVrSjsBcSY9upG6j4/XRj/LXzzhteKYC7LbbbhtvsZmZ9ahXe/4RsTq/PwvcQDpm/0w+ZEN+fzZXXwXsWpl8NLC6SfnoBuWN2jE9IsZHxPiOjo7eNN3MzBpomvwljZD05q5hYBKwFJgNdPXYmQLclIdnAyfnXj8TgXX5sNAcYJKk7fOJ3knAnDzuRUkTcy+fkyvzMjOzFujNYZ+dgBty78vhwNUR8W+SFgDXSjoNeAo4Pte/FTgS6AReAk4FiIi1ks4DFuR6X46ItXn408AVwFbAbfllZmYt0jT5R8TjwL4Nyv8DOLRBeQCn9zCvGcCMBuULgXf2or1mZlYDX+FrZlYgJ38zswI5+ZuZFcjJ38ysQE7+ZmYFcvI3MyuQk7+ZWYGc/M3MCuTkb2ZWICd/M7MCOfmbmRXIyd/MrEBO/mZmBXLyNzMrkJO/mVmBnPzNzArk5G9mViAnfzOzAjn5m5kVyMnfzKxATv5mZgVy8jczK5CTv5lZgZz8zcwK5ORvZlYgJ38zswI5+ZuZFcjJ38ysQL1O/pKGSXpI0i358+6S7pO0QtIPJG2Ry7fMnzvz+DGVeZyTyx+TdHilfHIu65Q0rb7FMzOzRvqy538WsLzy+ULgoogYCzwHnJbLTwOei4g9gItyPSTtA5wIvAOYDFySNyjDgG8CRwD7ACflumZm1iK9Sv6SRgMfBL6TPws4BLg+V5kJHJOHj86fyeMPzfWPBmZFxB8j4gmgE5iQX50R8XhE/AmYleuamVmL9HbP/1+BzwGv5M9vAZ6PiPX58ypglzy8C7ASII9fl+u/Wt5tmp7KzcysRZomf0kfAp6NiAeqxQ2qRpNxfS1v1JapkhZKWrhmzZqNtNrMzDamN3v+BwJHSXqSdEjmENJ/AttJGp7rjAZW5+FVwK4AefxIYG21vNs0PZW/TkRMj4jxETG+o6OjF003M7NGmib/iDgnIkZHxBjSCds7I+KjwF3AcbnaFOCmPDw7fyaPvzMiIpefmHsD7Q6MBe4HFgBjc++hLXKM2bUsnZmZNTS8eZUenQ3MkvQV4CHg8lx+OXCVpE7SHv+JABGxTNK1wCPAeuD0iHgZQNIZwBxgGDAjIpa9gXaZmVkTfUr+ETEPmJeHHyf11Ole5z+B43uY/nzg/AbltwK39qUtZmbWf77C18ysQE7+ZmYFcvI3MyuQk7+ZWYGc/M3MCuTkb2ZWICd/M7MCOfmbmRXIyd/MrEBO/mZmBXLyNzMrkJO/mVmBnPzNzArk5G9mViAnfzOzAjn5m5kVyMnfzKxATv5mZgVy8jczK5CTv5lZgZz8zcwK5ORvZlYgJ38zswI5+ZuZFcjJ38ysQE7+ZmYFcvI3MyuQk7+ZWYGaJn9Jb5J0v6SHJS2T9A+5fHdJ90laIekHkrbI5Vvmz515/JjKvM7J5Y9JOrxSPjmXdUqaVv9implZVW/2/P8IHBIR+wLjgMmSJgIXAhdFxFjgOeC0XP804LmI2AO4KNdD0j7AicA7gMnAJZKGSRoGfBM4AtgHOCnXNTOzFmma/CP5Xf64eX4FcAhwfS6fCRyTh4/On8njD5WkXD4rIv4YEU8AncCE/OqMiMcj4k/ArFzXzMxapFfH/PMe+iLgWWAu8Evg+YhYn6usAnbJw7sAKwHy+HXAW6rl3abpqdzMzFqkV8k/Il6OiHHAaNKe+t6NquV39TCur+WvI2mqpIWSFq5Zs6Z5w83MrKE+9faJiOeBecBEYDtJw/Oo0cDqPLwK2BUgjx8JrK2Wd5ump/JG8adHxPiIGN/R0dGXppuZWUVvevt0SNouD28FHAYsB+4CjsvVpgA35eHZ+TN5/J0REbn8xNwbaHdgLHA/sAAYm3sPbUE6KTy7joUzM7PGhjevws7AzNwrZzPg2oi4RdIjwCxJXwEeAi7P9S8HrpLUSdrjPxEgIpZJuhZ4BFgPnB4RLwNIOgOYAwwDZkTEstqW0MzMXqdp8o+IxcC7G5Q/Tjr+3738P4Hje5jX+cD5DcpvBW7tRXvNzKwGvsLXzKxATv5mZgVy8jczK5CTv5lZgZz8zcwK5ORvZlYgJ38zswI5+ZuZFcjJ38ysQE7+ZmYFcvI3MyuQk7+ZWYGc/M3MCuTkb2ZWICd/M7MCOfmbmRXIyd/MrEBO/mZmBXLyNzMrkJO/mVmBnPzNzArk5G9mViAnfzOzAjn5m5kVyMnfzKxATv5mZgVy8jczK5CTv5lZgZomf0m7SrpL0nJJyySdlctHSZoraUV+3z6XS9LFkjolLZa0X2VeU3L9FZKmVMr3l7QkT3OxJLViYc3MLOnNnv964LMRsTcwEThd0j7ANOCOiBgL3JE/AxwBjM2vqcClkDYWwLnAAcAE4NyuDUauM7Uy3eQ3vmhmZtaTpsk/Ip6OiAfz8IvAcmAX4GhgZq42EzgmDx8NXBnJfGA7STsDhwNzI2JtRDwHzAUm53HbRsS9ERHAlZV5mZlZC/TpmL+kMcC7gfuAnSLiaUgbCGDHXG0XYGVlslW5bGPlqxqUm5lZi/Q6+UvaBvgh8DcR8cLGqjYoi36UN2rDVEkLJS1cs2ZNsyabmVkPepX8JW1OSvzfj4gf5eJn8iEb8vuzuXwVsGtl8tHA6ibloxuUv05ETI+I8RExvqOjozdNNzOzBnrT20fA5cDyiPiXyqjZQFePnSnATZXyk3Ovn4nAunxYaA4wSdL2+UTvJGBOHveipIk51smVeZmZWQsM70WdA4GPA0skLcplfw9cAFwr6TTgKeD4PO5W4EigE3gJOBUgItZKOg9YkOt9OSLW5uFPA1cAWwG35ZeZmbVI0+QfEf9O4+PyAIc2qB/A6T3MawYwo0H5QuCdzdpiZmb18BW+ZmYFcvI3MyuQk7+ZWYGc/M3MCuTkb2ZWICd/M7MCOfmbmRXIyd/MrEBO/mZmBXLyNzMrkJO/mVmBnPzNzArk5G9mViAnfzOzAjn5m5kVyMnfzKxATv5mZgVy8jczK5CTv5lZgZz8zcwK5ORvZlYgJ38zswI5+ZuZFcjJ38ysQE7+ZmYFcvI3MyuQk7+ZWYGc/M3MCtQ0+UuaIelZSUsrZaMkzZW0Ir9vn8sl6WJJnZIWS9qvMs2UXH+FpCmV8v0lLcnTXCxJdS+kmZm9Vm/2/K8AJncrmwbcERFjgTvyZ4AjgLH5NRW4FNLGAjgXOACYAJzbtcHIdaZWpusey8zMatY0+UfE3cDabsVHAzPz8EzgmEr5lZHMB7aTtDNwODA3ItZGxHPAXGByHrdtRNwbEQFcWZmXmZm1SH+P+e8UEU8D5Pcdc/kuwMpKvVW5bGPlqxqUm5lZC9V9wrfR8froR3njmUtTJS2UtHDNmjX9bKKZmfU3+T+TD9mQ35/N5auAXSv1RgOrm5SPblDeUERMj4jxETG+o6Ojn003M7P+Jv/ZQFePnSnATZXyk3Ovn4nAunxYaA4wSdL2+UTvJGBOHveipIm5l8/JlXmZmVmLDG9WQdI1wMHADpJWkXrtXABcK+k04Cng+Fz9VuBIoBN4CTgVICLWSjoPWJDrfTkiuk4if5rUo2gr4Lb8MjOzFmqa/CPipB5GHdqgbgCn9zCfGcCMBuULgXc2a4eZmdXHV/iamRXIyd/MrEBO/mZmBXLyNzMrkJO/mVmBnPzNzArk5G9mViAnfzOzAjn5m5kVyMnfzKxATv5mZgVy8jczK5CTv5lZgZz8zcwK5ORvZlYgJ38zswI5+ZuZFcjJ38ysQE7+ZmYFcvI3MyuQk7+ZWYGc/M3MCuTkb2ZWICd/M7MCDd/UDRjyvjSyH9Osq78dZmZ94D1/M7MCOfmbmRXIyd/MrECDJvlLmizpMUmdkqZt6vaYmbWzQZH8JQ0DvgkcAewDnCRpn03bKjOz9jVYevtMADoj4nEASbOAo4FHNmmrBpl3zXxXn6dZMmVJC1piZkPdYEn+uwArK59XAQdsorYYsHyvvfs13d6PLu/zNN/81J39inX6ZYf0a7r/c8KH+jzNZ39wS79imQ1WiohN3QYkHQ8cHhGfzJ8/DkyIiL/uVm8qMDV/fDvwWB9D7QD89g02dzDGGuh4jjX04jnW0IvXn1hvi4iO3lQcLHv+q4BdK59HA6u7V4qI6cD0/gaRtDAixvd3+sEaa6DjOdbQi+dYQy9eq2MNihO+wAJgrKTdJW0BnAjM3sRtMjNrW4Nizz8i1ks6A5gDDANmRMSyTdwsM7O2NSiSP0BE3Arc2uIw/T5kNMhjDXQ8xxp68Rxr6MVraaxBccLXzMwG1mA55m9mZgPIyd/MrEBO/mZmBRo0J3zbgaSdSFcrB7A6Ip5pcbxRQETEc62MswliDdj32K6xcry2/M2sHm1/wncgVkpJ44DLgJHAr3PxaOB54DMR8WCNsXYDvgocmucvYFvgTmBaRDw5FGPleAP5PbZrrLb9zXK8kcBkKn/TwJyIeL7OOJuCpL1I9zSrLtvsiOj7PVN6IyLa8gWMA+YDy4Gf5NejuWy/mmMtAg5oUD4ReLjmWPcCJwDDKmXDSBfGzR+qsTbB99iusdr5NzsZ+CVwKfCF/Losl51c97LlmHsBZwMXA1/Pw3u3IM7Z+bucBnwsv6Z1lbVi2dp2z1/SIuCvIuK+buUTgW9FxL41xloREWN7GNcZEXsMUKwexw32WL2IN5DfY7vGGuq/2WOkDc3z3cq3B+6LiD3ripXnezZwEjCLdAsaSP/VnAjMiogLaoz1C+AdEfH/upVvASyr+3eD9j7mP6J74geIiPmSRtQc6zZJPwauZMPdSXcl7an8W82xHpB0CTCzW6wpwENDOBYM7PfYrrHa+TcT6XBId6/kcXU7jcYJ+V+AZUBtyZ+0DG8FftWtfOc8rnbtvOd/MfDnNF4pn4iIM2qOdwQbjteJtKcwO9KVy3XG2YK0UlZjrQRuBi6PiD8OxViVmAPyPbZrrHb+zSRNAb4I3M6Gv+ndgL8AzouIK2qO9yjpbsO/6lb+NuD2iHh7jbEmA98AVvDaZdsDOCMi6t6Qtm/yh4H94zaz1suHeA7ntX/Tc6IFPZoGOiFL2oz0YKvqsi2IiJfrjPNqvHZO/oOBpKmRbkU9ELE+FBED8tSRgYyV4w3k99iusdr2N2uVgU7IA6nIi7zyQ2EGLNwAxvpvbRoLBvZ7bNdYbfubSWrJRiYiXomI+RHxw4i4Pg8PaOKX1JINdpF7/pL+KiK+VfM89yLtHdwXEb+rlE9uwb+HE0gX7yzID7qfDDw6EIezJF0ZESe3Ok6OdRBpr2tpRNxe87wPAJZHxAuStiJ1q9uP9Nzof4yIdTXGOhO4ISJWNq38xmN1PQ9jdUT8RNJHgP9O6vI8vfvJy5pi/jnwYdI5tfWkwyTX1Pkd9qIN+0fEAwMY75aI6PvzQPsXa+eIeLr2+Raa/E+NiO/WOL8zgdNJf2DjgLMi4qY87sGI2K/GWOcCR5B6as0lPet4HnAY6djn+TXG6v5AHQEfIF0wREQcVVesHO/+iJiQh/8n6Tu9AZgE3Fxz17plwL6RniUxHXgJuJ50cdS+EfE/aoy1Dvg9qT/6NcB1EbGmrvl3i/V90rqxNelCq22AH5GWi4g4peZ4ZwJ/CfwUOJLUL/050sbgMxExr854g0WrEvKAasXFA4P9BTxV8/yWANvk4THAQtIGAOChFsQaRvrjfgHYNpdvBSyuOdaDwPeAg4H35/en8/D7W/C7PFQZXgB05OERwJKaYy2vLme3cYvqXi7SIdZJwOXAGlI3yCnAm2uOtTi/DweeIV/sRdpw17p+VNfHPLw1MC8P79aCdX8kqXvlo8B/5NfyXLZd3cs2WF7Aba2Yb9v285e0uKdRwE41hxsW+VBPRDwp6WDg+twlrO7jnusjHXN8SdIvI+KFHPcPkuruDzweOAv4PPC/I2KRpD9ExE9rjtNls9ybYzPSf6VrACLi95LW1xxraeU/wIcljY+IhZL2BOo+NBIR8Qqpi+LtkjYn/fd2EvA1oFcP3O6lzfKhnxGkZDwSWAtsCWxeY5yq4cDLOcabASLiqbycdbqW9F/nwRHxGwBJf0baiF5H6vJZG0nbAueQLuy6LSKuroy7JCI+U2Osno4OiHQ0oXZtm/xJCf5w0r+gVQLuqTnWbySNi4hFABHxO0kfAmYA76o51p8kbR0RLwH7dxXme57UmvxzwrpI0nX5/Rlau86MBB4gX8wj6c8i4jeStqH+jegnga9L+gLwW+BeSStJXfo+WXOs17Q90nH32cDsfL6hTpeT9oyHkTba10l6nHS7hVk1xwL4DrBA0nzgfcCFAJI6SBudOo2JiAurBXkjcKGkT9QcC+C7pPMXPwQ+IelY4CORrpWYWHOsBaRDZ43W8+1qjgW08TF/SZcD342If28w7uqI+EiNsUaT9sh/02DcgRHx8xpjbRkNLtSRtAOwc0QsqStWgxgfBA6MiL9vVYwe4m4N7BQRT7Rg3m8G/gtpo7YqWnPjvz0j4hd1z3cj8d4KEBGrJW1HOh/0VETc36J47wD2Jp2Yf7QVMXKc20n36JrZ9Tsp3bjxFOAvIuKwmuMtiohxlc+fJ53XOAqYG/Wey1sKfDgiVjQYtzIidq0r1qvzbdfkb2btJR8SnEa6cHPHXPwM6b+oC6LmC70kLSfd3uGVStkU4HOkc3xvqzHWcaTzWo81GHdMRNxYV6xX5+vkb2ZDXd09+PI8v0q6jcNPupVPBv5v1H+TvAHrLg5O/mbWBiQ9FRG7DWC8Idtd/NWYTv5mNhQ06cG3Z0RsOYBtqXVjI2kJ8J7cWWQM6ZqTqyLi65Ieioh31xWrSzv39jGz9jKQPfjaubs44ORvZkPHLaQTrYu6j5A0rwXx2rW7OODDPmZmDbVrd/FX5+vkb2ZWniJv6WxmVjonfzOzAjn5F0rSPEnjByDOmZKW51sN1znfcZKO7Gs9SUdJmlZnW7rFe6+kZZIWteC+PbYRA7VOtwsnf+szSX3pJfYZ4MiI+GjNzRhHus9Kn+pFxOyo8bkADXwU+FpEjIuIPzSrLGlYC9syoPq4Xtim1or7RPtV2328x5Cu+Ps2sIx0S+Ct8rh5wPg8vAPwZB4+BbgRuBl4AjgD+FvSPeXnA6Mq0/8rqcvaUmBCLh9B6l62IE9zdGW+1+X53tmgrX+b57MU+JtcdhnwJ9I93/9Xt/p9aedrlhPYAniKdF/8RcAJpCd+3ZOnvwd4ew/1TgG+kef3NuAOYHF+3y2XXwFcnOfzOHBcLt8ZuDvPaynw3m7L9EnSnSyfAL5P6hL4z7nuEuCEXO9g4C7gauCRBt/lacAv8rJ/u9LeDtIdJhfk14G5/Ev5N5uX23tmZV4fA+7Pbf4W+d77G1nnalsv8jQ/Bh7O8+pa/i/m+SwFprOh48k84KL8HS8nPXbyR6Q7a36l8jfxKDAz/27XA1s3WFcmAfeSnklxHfl5G35Vfp9N3QC/NvLjpBV9PTAuf74W+Fgerq7o3ZN/J+m+6h3AOuBTedxFbEjM84Bv5+H3ke7ICPCPlRjb5SQ0Is93FTkpd2vn/qTkNoL05KhlwLvzuCeBHRpM05d29rSc36jMb1tgeB4+DPhhD/Ve/UxKWFPy8CeAG/PwFTlhbAbsA3Tm8s8Cn8/Dw2jwIJY8bdfG4ljS09aGkfqMP0XagBxMerLX7g2mf2v+zkaR7r//s0p7rwYOysO7kR9IQ0r+95Dup78D6SEnm5PutHkzsHmudwlwcpN1rs714tiueeXPI/P7qErZVcBfVmJfmIfPAlbn72vLHOMtpL+JYMOGbwbwd9V1JX8HdwMjcvnZwBc39d/zYHv537TB74nYcFHLA6SVv5m7IuJF4MX8CMGbc/kS4L9W6l0DEBF3S9o23/53EnCUpL/Ldd5ESjSQbmPb6B7tB5GeUft7AEk/At5L2kOso529MRKYKWksKTn05kEi7wG6Htd4FfDVyrgbI93N8ZF822BIe6sz8kNKbowGFxt1cxDpWbYvA89I+ilpb/YF4P5ofIvqCcBPu77n/CyFPfO4w4B9pFcv+Nw235Ia4MeRbvX9R0nPkjY2h5I2zAvyNFsBzzZpM9S3XiwBvibpQuCWiPhZLv+ApM+RHjYzirSz0PXbz65MuyzyoxLzMwl2JT2acmVs6Pf+PeBM0kNxukwkbbR/npd7C9J/AVbh5D/4Ve/d/zLpDxjSfwRd52zetJFpXql8foXX/ubdL/II0qGKY6PbrWWVHnj++x7a2N/Lz3vTzo0tZ9V5pI3Jh/O9Ueb1oz3V76PaNsGryfB9wAeBqyT9c0RcuZH5bex76c93uRnp/i+vOZeQE1z39WR4ntfMiDhnI/NspJb1IiJ+IWl/0jmXf8r34/8q6T+Q8RGxUtKXeO3vWl0Huq8fXetEo/a9plmkDdJJjRfPwCd8h7In2fAkr+P6OY8TACQdBKyLiHXAHOCvlTOKpN7cUOpu4BhJW0saQXp498+aTNNbT9J4OV8kPzIwGwn8Og+fspF6VfcAJ+bhjwKvu5KzKt9n5dmI+DbpiVnN7rR4N3CCpGH5yVbvIx1/35j7gfdL2j6fQD22Mu520rmRrvY0e7zfHcBxknbM9UflZWimlvUiP1TmpYj4HmnPfD82JPrfKj2hrT/r7m6S3pOHT+L1v9t84EBJe+R2bK30eE6rcPIfur4GfFrSPaRjnP3xXJ7+MtJJRkh70JsDi/PThc5rNpOIeJB0rPt+4D7gOxHR7JBPb/W0nHeRDoEsknQCaY/ynyT9nHSMvad6VWcCp+YbeH2cdJx5Yw4GFkl6iJSUv96k/g2kk5IPk549+7locPl+VUT8mnR8/T7SU6seIZ0P6WrveEmLJT0CfKrJvB4BvkB6bvBi0vmHnZu0GWpaL0j3pLlf0iLSIyW/EhHPk05iLyGd8F/Qi/l0txyYkpdpFHBpdWSkZz+fAlyT68wH9upHnLbm2zuYDTKStol0Y6/hpA3IjIi4YYBizyOdQF04EPH6Kh/SuyUi3rmJmzLkec/fbPD5Ut5bXkrqNlr7I/zMvOdvZlYg7/mbmRXIyd/MrEBO/mZmBXLyNzMrkJO/mVmB/j+3tBI+8tjiCgAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "from matplotlib import pyplot as plt\n",
    "fig, ax = plt.subplots()\n",
    "\n",
    "fig.tight_layout()\n",
    "\n",
    "df_long.value.value_counts().sort_index()[1:].plot.bar()\n",
    "plt.xlabel('number of mutations for gene  per sample')\n",
    "plt.subplots_adjust(bottom=0.2, left=0.2)\n",
    "\n",
    "plt.savefig('mutation distribution', dpi=200)\n",
    "# plt.show()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2359"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sum(df_long.value > 1.)/ "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.01622849724115547"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "100*sum(df_long.value > 1.)/ float(df_long.shape[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python [conda env:min_env]",
   "language": "python",
   "name": "conda-env-min_env-py"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.15"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
